## Sourobh Ghosh, Casey Kearney, Mahdi Hashemian
## Gov 2001 Replication Project

## NOTE: sections are labeled according to which table the replicate in the paper,
## followed by ideas for our extension




####### TABLE 3 ################


require(readstata13)

setwd("C:/Documents CLK/Grad School/Spring 2016/Gov 2001/Replication Paper/Data/Data")  ## modify this as necessary

data = read.dta13("SovietCollaboration_MainDataset.dta")
data.Table5 = read.dta13("SovietCollaboration_CitationToSovietArtDataset.dta")  ## for Table 5
data.spec = read.dta13("SovietCollaboration_SpecializationDataset.dta") ## for Fig 7, Table 6, Table A8-A9

## dropping Soviet subfields
newd = data[-which(data$sovietauthor == 1), ]

##### robust cluster SEs function (http://www.r-bloggers.com/easy-clustered-standard-errors-in-r/)

get_CL_vcov<-function(model, cluster){
  require(sandwich)
  require(lmtest)
  
  #calculate degree of freedom adjustment
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- model$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  
  #calculate the uj's
  uj  <- apply(estfun(model),2, function(x) tapply(x, cluster, sum))
  
  #use sandwich to get the var-covar matrix
  vcovCL <- dfc*sandwich(model, meat=crossprod(uj)/N)
  return(vcovCL)
}


###### Top and bottom three subfields

## subsetting top 3 bottom 3 dataset (133498 obs)
newt3b3 = subset(newd, newbdrankingposition<=3 | newbdrankingposition>=31, 
                 select=c(logauthourcount, newbdrankingposition, afterdummy, adjustedmsc, year))

## creating dummy for topsoviet
topsovietdummy = rep(0,nrow(newt3b3))
for (i in 1:nrow(newt3b3)){  if (newt3b3$newbdrankingposition[i] < 4){topsovietdummy[i] = 1} }

t3b3 = cbind(newt3b3,topsovietdummy)
fit = lm(logauthourcount ~ topsovietdummy:afterdummy
         + factor(adjustedmsc) + factor(year), data = t3b3)  ## OLS with fixed effects 

## call robust cluster SE function and save the var-cov matrix output in an object
m1.vcovCL<-get_CL_vcov(fit, t3b3$adjustedmsc)

## results
coeftest(fit, m1.vcovCL)  ## coefficient on AfterIronCurtain x SovietRich and corresponding robust cluster SE
summary(fit)$adj.r.squared  ## R-squared

## so column 1 in our coeftest object are the coefficients we want
coefst3b3 <- as.matrix(coeftest(fit, m1.vcovCL)[,1])

dim(coefst3b3)
getwd()
fixed.effects.covariates <- as.matrix(read.csv("replication t3b3 data3.csv"))
fixed.effects.covariates <- fixed.effects.covariates[,-ncol(fixed.effects.covariates)] ## eliminate last column
## which we want to replace with interaction term, not just the top soviet dummy
interactionvariable <- t3b3$afterdummy*t3b3$topsovietdummy ## defining the interaction term
fixed.effects.covariates <- as.matrix(cbind(fixed.effects.covariates,interactionvariable)) ## reassigning matrix
dim(fixed.effects.covariates)

predicted.y <- fixed.effects.covariates%*%coefst3b3
resids <- predicted.y - t3b3$logauthourcount
plot(resids,
     ylim = c(-2,1))
getwd()
setwd("C:/Documents CLK/Grad School/Spring 2016/Gov 2001/Replication Paper/Model Dependence Check")

pdf("Residual Plot by Year.pdf")
plot(t3b3$year,resids,
     xlab = "Year",
     ylab = "Residuals",
     main = "Residual Plot by Year")
abline(v=1991, col = "red")
dev.off()

pdf("Residual Plot by Subject.pdf")
plot(t3b3$adjustedmsc,resids,
     xlab = "Subject Coding",
     ylab = "Residuals",
     main = "Residual Plot by Subject")
dev.off()

pdf("Residual Plot by Interaction Term.pdf")
plot(t3b3$topsovietdummy*t3b3$afterdummy,resids,
     xlab = "Top Soviet and After Iron Curtain Interaction",
     ylab = "Residuals",
     main = "Residual Plot by Interaction Term")
dev.off()


